# Direct outputs eligible for public release

# GLOBAL SETTINGS --------------------------------------------------------------

options(
    scipen = 999,
    digits = 16,
    max.print = .Machine$integer.max,
    show.error.locations = TRUE,
    warn = 1
)

RNGkind("L'Ecuyer-CMRG")
seed <- 818675309L
set.seed(seed) # setting main seed

# PACKAGES ---------------------------------------------------------------------
library(data.table)
library(ggplot2)
library(scales)

# PACKAGE SETTINGS -------------------------------------------------------------

# data.table
setDTthreads(threads = 1L)
options(datatable.print.class = TRUE, datatable.print.keys = TRUE)
# so that printing the data.table also shows the variable type on top

# BEGIN FILE -------------------------------------------------------------------

# Read in estimation results, and calculate cohort-weighted pre-win means
ReadFigB4Results <- function(outcome) {
    results_list <-
        readRDS(
            sprintf("~/estimation-output/event_study_estimates_%s.rds", outcome)
        )

    did_coefs <- setDT(results_list[[1]])

    did_coefs <-
        did_coefs[
            between(ref_event_time, -7L, 5L, incbounds = TRUE) &
                ref_onset_time == "Cohort-Weighted" &
                model == "reduced_form" &
                rn == "att",
            .(
                ref_event_time,
                estimate,
                cluster_se
            )
        ]

    cohort_weights <- setDT(results_list[[1]])
    cohort_weights <-
        cohort_weights[
            between(ref_event_time, -7L, 5L, incbounds = TRUE) &
                model == "reduced_form" &
                rn == "catt",
            .(
                ref_onset_time,
                ref_event_time,
                catt_treated_unique_units
            )
        ]

    treat_means <-
        rbindlist(
            list(results_list[[3]][[1]], results_list[[3]][[2]]),
            use.names = TRUE
        )

    treat_means <-
        treat_means[
            between(ref_event_time, -7L, 5L, incbounds = TRUE) &
                model == "reduced_form",
            .(ref_onset_time, ref_event_time, model, treat_mean_outcome)
        ]
    treat_means[, ref_onset_time := as.character(ref_onset_time)]

    # Merge in cohort weights to treat_means so as
    # to construct cohort-weighted versions of the pre-win means
    treat_means <-
        merge(treat_means,
            cohort_weights,
            by = c(
                "ref_onset_time",
                "ref_event_time"
            ),
            all.x = TRUE,
            sort = FALSE
        )

    cw_treat_means <-
        treat_means[
            ,
            .(
                ref_onset_time = "Cohort-Weighted",
                cw_treat_mean =
                    weighted.mean(
                        x = treat_mean_outcome,
                        w = (
                            catt_treated_unique_units / sum(catt_treated_unique_units) # nolint
                        )
                    )
            ),
            by = .(ref_event_time)
        ]

    did_coefs <-
        merge(
            did_coefs,
            cw_treat_means,
            by = c("ref_event_time"),
            all.x = TRUE,
            sort = FALSE
        )

    # Use the event time representing the most units
    did_coefs[
        ,
        pre_win_mean :=
            sum(
                as.integer(
                    ref_onset_time == "Cohort-Weighted" & ref_event_time == 0
                ) * cw_treat_mean
            )
    ]

    did_coefs[, c("ref_onset_time", "cw_treat_mean") := NULL]
    setnames(did_coefs, "ref_event_time", "event_time")

    # Introduce a omitted_event_time row
    did_coefs <-
        rbindlist(
            list(
                did_coefs,
                data.table(
                    event_time = -2L,
                    estimate = 0,
                    cluster_se = 0
                )
            ),
            use.names = TRUE,
            fill = TRUE
        )
    did_coefs[, dependent_var := outcome]
    setorderv(did_coefs, "event_time")

    # Housekeeping
    results_list <- NULL
    cohort_weights <- NULL
    treat_means <- NULL
    cw_treat_means <- NULL
    rm(results_list, cohort_weights, treat_means, cw_treat_means)

    return(did_coefs)
}

outcomes <-
    c(
        "per_adult_capital_income",
        "per_adult_tot_int",
        "per_adult_div",
        "per_adult_pension",
        "per_adult_add_to_othinc",
        "per_adult_rentinc"
    )

estimates <- rbindlist(lapply(outcomes, ReadFigB4Results), use.names = TRUE)

# Label each series
estimates[, label_var := as.character(NA)]

estimates[
    dependent_var == "per_adult_capital_income",
    label_var := "Per-Adult Capital Income"
]

estimates[
    dependent_var == "per_adult_tot_int",
    label_var := "Interest"
]

estimates[
    dependent_var == "per_adult_div",
    label_var := "Dividends"
]

estimates[
    dependent_var == "per_adult_pension",
    label_var := "Pensions"
]

estimates[
    dependent_var == "per_adult_add_to_othinc",
    label_var := "Estate, Trust, and Other Capital Income"
]

estimates[
    dependent_var == "per_adult_rentinc",
    label_var := "Rent and Royalty Income"
]

estimates[
    ,
    label_var :=
    factor(
        label_var,
        levels =
        c(
             "Per-Adult Capital Income",
             "Interest",
             "Dividends",
             "Pensions",
             "Estate, Trust, and Other Capital Income",
             "Rent and Royalty Income"
        )
    )
]
# Figure B.4 -- decomposing capital income response

figure_b_4 <-
    ggplot(
        data = estimates,
        aes(x = event_time, y = estimate, color = factor(label_var))
    ) +
    geom_line() +
    geom_ribbon(
        aes(
            ymin = (estimate - (1.64 * cluster_se)),
            ymax = (estimate + (1.64 * cluster_se)),
            fill = factor(label_var)
        ),
        linetype = 0,
        alpha = 0.2,
        show.legend = FALSE
    ) +
    theme_bw(base_size = 13) +
    theme(panel.grid.minor = element_blank()) +
    scale_x_continuous(
        breaks = seq.int(from = -7L, to = 5L, by = 1L),
        expand = expansion(mult = c(0.0025, 0.0025))
    ) +
    scale_y_continuous(breaks = breaks_extended(n = 10)) +
    labs(x = "Event Time (ℓ)", y = "Event Study Estimate (2016 USD)") +
    theme(
        legend.title = element_blank(),
        legend.position = c(0.2775, 0.7775),
        legend.background = element_rect(fill = "white", color = "grey"),
        strip.text.x = element_blank(),
        strip.background = element_blank(),
        legend.key = element_rect(NA),
        legend.spacing.y = unit(2, "mm"),
        legend.key.height = unit(4, "mm"),
        legend.key.width = unit(4, "mm"),
        legend.margin = margin(t = 0, r = 0.1, b = 0.15, l = 0.1, unit = "cm")
    ) +
    scale_color_viridis_d( ) +
    scale_fill_viridis_d( ) +
    guides(color = guide_legend(ncol = 1, byrow = TRUE)) +
    coord_cartesian(ylim = c(-250, 2000))

ggsave(
    plot = figure_b_4,
    filename = "~/paper/figures/figure-B.4.png",
    width = 6,
    height = 4,
    dpi = 600
)

ggsave(
    plot = figure_b_4,
    filename = "~/paper/figures/figure-B.4.tif",
    width = 6,
    height = 4,
    device = "tiff",
    dpi = 600
)

# Housekeeping
outcomes <- NULL
estimates <- NULL
figure_b_4 <- NULL
rm(
    outcomes,
    estimates,
    figure_b_4
)